Also over 550 non-profit orgs and volunteer groups
Budget of $583 Million
Over 5,000 employees
over 500 different species planted
Sources
Department of City Planning
Getting boundaries of city
Show code
download_nycc_boundaries <-function(url ="https://s-media.nyc.gov/agencies/dcp/assets/files/zip/data-tools/bytes/city-council/nycc_25c.zip") {# Set destination folder and file paths dest_dir <-file.path("..", "data", "mp03") zip_file <-file.path(dest_dir, basename(url))# Create directory if it doesn't existif (!dir.exists(dest_dir)) {dir.create(dest_dir, recursive =TRUE) }# Download zip file if it doesn't existif (!file.exists(zip_file)) {download.file(url, destfile = zip_file, mode ="wb") }# List contents of the zip to find the .shp file zip_contents <-unzip(zip_file, list =TRUE) shp_file_rel <- zip_contents$Name[grepl("\\.shp$", zip_contents$Name, ignore.case =TRUE)]if (length(shp_file_rel) ==0L) {stop("No .shp file found in the zip archive.") }# Use the first .shp file found shp_file_rel <- shp_file_rel[1] shp_file <-file.path(dest_dir, shp_file_rel)# Unzip only if shapefile doesn't existif (!file.exists(shp_file)) {unzip(zip_file, exdir = dest_dir) }# Read and transform shapefile sf_data <-st_read(shp_file, quiet =TRUE) sf_data_wgs84 <-st_transform(sf_data, crs ="WGS84")return(sf_data_wgs84)}nyc_boundaries <-download_nycc_boundaries()
NYC Open Data
Forestry Tree points
More than one million trees
Show code
# Function to download NYC tree points data in chunks and combine into one sf objectdownload_nyc_tree_points <-function(base_url ="https://data.cityofnewyork.us/resource/hn5i-inap.geojson",limit =100000# rows per request) {# Ensure destination directory exists dest_dir <-file.path("..", "data", "mp03")if (!dir.exists(dest_dir)) dir.create(dest_dir, recursive =TRUE) offset <-0 all_files <-list()repeat { offset_str <-format(offset, scientific =FALSE) file_name <- glue::glue("tree_points_{offset_str}.geojson") file_path <-file.path(dest_dir, file_name)if (!file.exists(file_path)) {# Build and perform the request resp <- httr2::request(base_url) |> httr2::req_url_query(`$limit`= limit, `$offset`= offset_str) |> httr2::req_user_agent("Baruch College Mini-Project (student) API access") |> httr2::req_perform()# Save raw response to filewriteBin(httr2::resp_body_raw(resp), file_path) }# Read GeoJSON into sf sf_data <-tryCatch( sf::st_read(file_path, quiet =TRUE),error =function(e) {message(glue::glue("Failed to read file at offset {offset_str}: {e$message}"))NULL } )# Stop if no dataif (is.null(sf_data) ||nrow(sf_data) ==0) break# Normalize planteddate column (avoid mixed types across pages)if ("planteddate"%in%names(sf_data)) { sf_data$planteddate <-as.character(sf_data$planteddate) } all_files[[length(all_files) +1]] <- sf_data# If we got fewer than the limit, that's the last pageif (nrow(sf_data) < limit) break offset <- offset + limit }if (length(all_files) ==0) {message("No data downloaded.")return(sf::st_sf()) # empty sf }# Combine sf chunks; preserves geometry & CRS combined_data <-do.call(rbind, all_files)# Make sure CRS is WGS84 (Leaflet expects EPSG:4326)if (!is.na(sf::st_crs(combined_data))) { combined_data <- sf::st_transform(combined_data, 4326) } else { sf::st_crs(combined_data) <-4326 } combined_data}tree_points <-download_nyc_tree_points()tree_points_samp<- tree_points|>slice_sample(n=400000)
# Filter dead/critical treesdead_trees <- tree_points_samp|>filter(tpcondition =="Dead"|tpcondition =="Critical")# Count dead trees per district using intersectiondead_counts <-lengths(st_intersects(nyc_boundaries, dead_trees))# Add counts and compute ratiodead_ratio <- nyc_boundaries |>mutate(num_trees =lengths(st_intersects(nyc_boundaries, tree_points_samp)), # total treesn_dead = dead_counts,dead_frac = n_dead / num_trees )ggplot(dead_ratio) +geom_sf(aes(fill = dead_frac), color ="white") +scale_fill_gradientn(colours =c("#FFFFCC","darkgreen","#4D004B"),values = scales::rescale(c(0, 0.5, 1)),name ="Dead Tree Ratio") +geom_sf(data =filter(dead_ratio, CounDist ==20), fill =NA, color ="red", size =1.2) +theme_void() +labs(title ="Dead Tree Ratio Across Districts")
District Graphs
Show code
# Compute density and prepare datadensity_boundaries <- nyc_boundaries |>mutate(num_trees =lengths(st_intersects(nyc_boundaries, tree_points_samp)),n_dead = dead_counts,tree_density =round(num_trees / (Shape_Area /27878400), 1),dead_frac = n_dead /pmax(num_trees, 1) )plot_data <- density_boundaries |>st_drop_geometry() |>filter(CounDist %in%c(20, 7, 41, 44)) |>select(District = CounDist, tree_density, dead_frac)# Chart 1: Tree Densityggplot(plot_data, aes(x =factor(District), y = tree_density, fill =factor(District)) ) +geom_col() +scale_fill_manual(values =c("#006400", "#228B22", "#32CD32", "lightgreen")) +labs(title ="Tree Density", x ="District", y ="Trees per mi²") +theme_cowplot() +theme(legend.position ="none") +scale_y_continuous(expand =c(0, 0))
District Graphs 2
Show code
# Chart 2: Dead Tree Ratioggplot(plot_data, aes(x =factor(District), y = dead_frac, fill = dead_frac) ) +geom_col() +scale_fill_gradient(low ="darkgreen", high ="#4B0082") +scale_y_continuous(labels =percent_format(accuracy =1), expand =c(0, 0)) +labs(title ="Dead Tree Ratio", x ="District", y ="Ratio") +theme_cowplot() +theme(legend.position ="none")
Proposal
Improve tree health in district 20
Replace 500 dead or critical condition trees with new ones
Thornltess Honeylocust
Swamp White Oak
European Hornbeam
Each costs $3,300 on avg
Project costs $1.65 million
Implementation Plan
Phase 1: Assessment (Month 1)
Verify tree condition data
Phase 2: Removal (Months 2–3)
Safely remove 500 dead or critical trees
Phase 3: Planting (Months 4–6)
Plant suggested or other trees
Phase 4: Maintenance & Monitoring (Months 7–12)
Regular watering and pruning
Condition updates
Limitations
Unfortunately, limitations for this analysis exist.
Only 400,000 rows of data instead of 1 million
Data processing takes too long
Data only includes trees on sidewalks
No to limited trees in parks
Species Identification errors
Maintenance costs doesn’t cover long-term care
Questions?
2. Sampling & Performance
Memory constraints: Using personal PC -32 gigabytes of ram, but outdated